/************************************  

DESCRIPTION: Create full sample for ols and blocking regressions.
Want to limit data to over 64 non-ESRD patients with the full suite of controls.

Datasets used:
 (1) fullListAnalysis2005-2012

Datasets created:
 (1) LPManalysis
 (2) maleLPManalysis2
 (3) femLPManalysis2
 
 
************************************/  

set more off
capture log close
clear all

set matsize 10000

global origData "N:\MedicareClaims-P045601-BE"
global dataIn "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-In\"
global dataOut "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global dataProp "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\PropScore\Patients"
global logs "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Logs\PropScore\Patients"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global skapath "N:\MedicareClaims-P045601-BE\Work\ska"
global output

adopath +  "N:/SIL-Common/estout"
adopath +  "N:/SIL-Common/outreg2"
adopath +  "N:/SIL-Common/reghdfe-master/package"

log using "$logs/1LPMdata.log", replace
 
*These are 2005 means to provide some sense of the importance of specialties 
*spec69	0.0622442 lab
*spec49	0.0118779 ASC
*spec59	0.0235907 ambulance

*spec35	0.0195185 chiropractor
*spec43	0.0146038 nurse anesthetist
*spec50	0.0223126 nurse practitioner
*spec97	0.0150443 physician assistant

*spec16	0.0158466 obgyn
*spec04	0.0162783 allergy
*spec66	0.0103838 rheumatolog
*spec39	0.0141979 nephrology
*spec83	0.0142171 hematology
*spec29	0.0201113 pulmonary
*spec13	0.0203456 neurology 
*spec26	0.0220521 psychiatry
*spec41	0.0232295 optomentry
*spec10	0.0247395 gastroenerology
*spec02	0.0255002 general surgery
*spec22	0.0266265 pathology
*spec05	0.0269228 anesthesiology
*spec34	0.027834 urology
*spec07	0.0322283 dermatology  
*spec20	0.0354502 orthopedic surgery
*spec48	0.0567119 podiatry
*spec18	0.0726766 opthalmology
*spec06	0.0917368 cardiologist
*spec30	0.1407671 diagnostic radiology

*spec01	0.0242628 fmp
*spec93	0.0532554 fmp
*spec08	0.1419254 fmp
*spec11	0.2011499 fmp
 
	*POTENTIAL CONTROLS FROM THE PAPER'S MEANS TABLE - dependent and merger variables are defined on this.
	use "$dataOut/fullListAnalysis2005-2012.dta", replace

	*age categories
	 gen p_age=age
	 replace p_age=95 if age>=95

		gen agecat=1 if  p_age>=65 & p_age<70
		 replace agecat=2 if p_age>=70 & p_age<75
		 replace agecat=3 if p_age>=75 & p_age<80
		 replace agecat=4 if p_age>=80 & p_age<85
		 replace agecat=5 if p_age>=85 & p_age<90
		 replace agecat=6 if p_age>=90 & p_age<95
		 replace agecat=7 if p_age>=95  

	*Lead and lag variables
		gen vm_age=qtr-q_of_merger
		 replace vm_age=0 if vm_age==.
		 replace vm_age=-17 if vm_age<-16
		 replace vm_age=22 if vm_age>=22

	qui summ vm_age
		local vm_mn=-r(min)
		 replace vm_age = vm_age + `vm_mn'

	*Sample limitations
		gen flag1 = 1
		gen flag2 = mxhypertension_p_all 
		gen flag3 = mxdiabetes_p_all 

	*Death
		egen death_date=min(BENE_DEATH), by(BENE_ID)
		format death_date %td
	drop if quarter> death_date
	sort BENE_ID quarter
	by BENE_ID: gen oc_death=death_date~=. & _n==_N

	*OUTCOME VARIABLES  
	drop new*
	sort BENE_ID quarter
		foreach x of varlist mx*    {
			if regexm("`x'","^mx([A-Za-z0-9]+_[A-Za-z0-9]+).*") { 
			 local var "`=regexs(1)'"
			 di "`var'" 
			by BENE_ID : gen new`var' = (`x' == 1 & `x'[_n-1] == 0 )
		 }
		}
 

*Heart Disease
*Keep variables since modeling like a hazard model

		gen kv_death=1
		gen kv_acuteicd8 =1
		gen kv_ami       =1
		gen kv_ihd       =1

		rename  mxicd8_acute_p_all    oc_acuteicd8
		rename  mxicd_ami_p_all           oc_ami       
		rename  mxicd_ihd_p_all 		oc_ihd       

		 replace kv_acuteicd=0 if newicd8_acute     ==0  & oc_acuteicd8==1
		 replace kv_ami=0      if newicd_ami          ==0  & oc_ami==1  
		 replace kv_ihd=0      if newicd_ihd==0  & oc_ihd==1
 
*Diabetes
		gen kv_diabcomp1 =1
		gen kv_diabcomp2 =1
		gen kv_glaucoma  =1
		rename  mxdiab_compl1_p_all   oc_diabcomp1
		rename  mxdiab_compl2_p_all   oc_diabcomp2
		rename  mxicd_glaucoma_p_all      oc_glaucoma  

		 replace kv_diabcomp1=0 if newdiab_compl1==0 & oc_diabcomp1==1
		 replace kv_diabcomp2=0 if newdiab_compl2==0 & oc_diabcomp2==1 
		 replace kv_glaucoma=0  if newglaucoma_p ==0 & oc_glaucoma==1

		 
************		 
*Weights (at one point i wanted to weight by the escore. Eventually, choose not to do it)
*First get the propensity score from the log odds ratio
gen escore= 1/(1+exp(lor))
*Next create the sampling weight from the propensity score
gen samp_weight=1/escore if merger==1
replace samp_weight=1/(1-escore) if merger~=1
*don't use the sample weight
 replace samp_weight=1
************		 


*Geography
*3digit zip

	sort BENE_ID quarter
		foreach var of varlist zip_pat fips_state STATE_CODE county BENE_COUNTY {
			 by BENE_ID: replace `var'=`var'[_n+1] if `var'==""
			 by BENE_ID: replace `var'=`var'[_n-1] if `var'==""
			 replace `var'="000000000" if `var'==""
		}

		gen ZIP3=substr(zip_pat,1,3)
		encode ZIP3, gen(zip3dig)
  
		*Quarter and postmerger
		 replace qtr=qofd(quarter)
		format qtr %tq
	capture drop year
		gen year=year(quarter)
	capture drop nyear
		gen nyear=year-2006

	*mergers
		levelsof vm_provider1, local(vmp)
		foreach i of local vmp {
			gen VMP`i' = vm_provider1==`i'
			 replace VMP`i' =1 if vm_provider2==`i'
			 replace VMP`i' =1 if vm_provider3==`i'
		}
 
		foreach var of varlist VMP* {
			 replace `var'=0 if `var'==.
			gen pm`var'=postmerger * `var'
		}

		gen vmerger=merger_seen
		 replace vmerger=0 if vmerger==.
*Specialty
  *Specialty cleanup
 
		 foreach var of varlist psps* {
			  local i= subinstr("`var'","psps","",.)
			  di "`i'"
			   rename `var' spec`i'
			}
  

	*Family practice
		 capture drop fmp
		 gen fmp=spec08
		  replace fmp=1 if spec01==1
		  replace fmp=1 if spec11==1
		  replace fmp=1 if spec93==1

 *Non-physician NP and PA  
		 egen npt = rowtotal( spec35 spec97 spec66 spec04 spec16 )
		  gen nptf = npt >0

 *Anesthesia
		 gen anesth = spec05==1
		  replace anesth=1 if spec43==1
  
*Other
		 egen specrt = rowtotal( spec83 spec39 spec66 spec04 spec16 )
		 gen specrtf = specrt>0

*Group: Specialty
		egen SPEC=group(fmp nptf anesth specrtf spec30 spec06  spec18 spec48  spec20 spec07 spec34 spec05 spec02 spec10 spec41 spec26 spec13 spec29  )

*State
*destring STATE_encode, gen(state)
*Note that State rarely varies within propScoreId, so controlling for it isn't that important
	capture drop STATE_encode
		encode fips_state, gen(STATE_encode)
		gen state=STATE_encode
		 replace state=0 if inlist(state,  38 , 49 ,  9 , 46 , 29 , 2 ,  30, 25, 6 , 27 , 52 , 41 ,  13 , 8 , 28, 32 , 40, 47 , 12)
		 replace state=0 if inlist(state,  44 , 50 , 33 , 43 , 22 , 16 ,  3 ,  1 , 20 , 19 , 31 , 11 , 37 , 21, 5)
 
	 gen urban_rural1=urban_rural==1

 *zip and white
		 gen rac_white = RACE1==1
		 egen zipwhite = group(zip_pat rac_white)
   

 *age
		  replace age = 96 if age>95

 save  ./LPManalysis.dta, replace
 
 use ./LPManalysis.dta
  keep if SEX2==1
  drop if q_of_merger==. & vmerger==1
  drop if year(quarter)==2005
 save femLPManalysis2.dta, replace
  

 use ./LPManalysis.dta
  keep if SEX1==1
  drop if q_of_merger==. & vmerger==1
  drop if year(quarter)==2005
  save maleLPManalysis2.dta, replace
clear
 